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^include <stdio.h> 



#defmeMATCH^NEEDED^IN^2ND 3 

#defme LEN_MER 8 // recompile when changed. *things to pay attention to. 
int SecondStruct(const char*); 

int CrossHyb(const char *strl, const char *str2, int overlap Jength); 
int SimpleMatch(const char *strl, const char *str2); 
char FirstN(const char *strl, const char ♦str2, int N); 

intGC^ADDinON^l; 
mtNUM_GC = 4; 

int SC0RE^NEEDED_IN_2ND = 7; // 1+2+4 
chareql,eq2; 

FILE*fp = stdout; 
FILE*^log«stdenr; 

main(int argc, char ♦♦argv) 

{ 

int ii, jj, comp_score, s; 

intMM[LEN_MER]; 

char line[256], strtLEN_MER+l]; 

int gcsum, pass _jc, total jDrobes, failed^fh, failed^ch, failed_sm; 
char convert[43; // 0123 to atcg convertion. 
char '^compatible; 

int max_j)rb, cnt jrb; 
char * ♦probe; 

int max_snd, cnt_snd; 
char **sndstr; 
int *snd_matchcnt; 

int SIMPLE_CUTOFF = 5 ; // reject if this many bps match to each other, 

// no matter where they are located, 
mt CROSSHYB CUTOFF « 9; // 1+2+4 + 2 
int CROSSHYbIoVERLAP = 5; 



intFIRST^N«4; 




eql = eq2 = "; 



{ 




fprintfl[stderr, 

"Usage: %s -o output_ffle[stdout]\n", 

argy[0]); 
fprintf{stderr, 

"\t\t-gc number_of.GCs_in_probe[%d]\n", 

NUM^GC); 
^rint^stderr, 

"\t\t-2 secondary_stracturej«ject(includ2ng this value)[%d]\n", 

SC0RE_NEEDe).IN_2ND); 
^rintf(stderr, 

"\t\t-ch crosshyb_reject(includmg this value)[%d]\n", 

CROSSHYB.CUTOFF); 
§)rintf(stderr, 

"\t\t-sm simple match_reject(including this value)[%d]\n", 

SIMPLE^CUTOFF); 
fprintfi[stderr, 

''\t\t-oI crosshyb_overlapJength[%d]\n", 

CROSSHYB^OVERLAP); 
^rintf(stderr, 

"\t\t-eq add'l_cquiv__bp_in_compat_checking[%c%c] (e.g., -eq gt)\n", 

eql,eq2); 

^rintf(stderr, "\t\t.fn first^N Jength[%d]\n", FIRST^N); 
fprintf(stderT, "\t\t-gc_add GC^adtf 1 j)enalty[%d]\n", GC^ADDITION); 

exit(O); 

} 

// parse input parametos. 
ii=l; 

while(ii < argc) 
{ 

if(strcmp(argv[ii], "-gc") = 0) 
sscanf(argv[ii+I], "Vod", &NUM_GC); 

else if(strcmp(argv[ii3, "-2") = 0) 
sscanf(argv[ii+I3. "%d", &SC0RE_NEEDED_IN_2ND); 

else ifl[strcmp(argv[ii3, "-ch") = 0) 
sscanf{argv(ii+l], "%d", &CROSSHYB_CUTOFF); 

else if(stromp(argv[ii], "-ol") = 0) 
sscanfl:argv[ii+l], "%d", &CROSSHYB_OVERLAP); 

else ifi(strcmp(argv[ii], "-eq") = 0) 

' FIG. 16B 



if(strlen(argv[u+13) = 2 H 
(strien(argv[u+lD = 3 && argv[u+l][2] — "Nn)) 

^ eql = argv[ii+l][0]; 
eq2 = argv[ii+ll[l]; 

} 

else 

^ fprintf(stderr.niiERROR: Invalid string after -eqflag.\n« 
exit(l); 

} 

} 

else if(strcmp(argv[u], "-o") = 0) 

= fopen(argv[ii+l]. "w")) - NULL) 

^fprintf(stdcrT, "Can't open file •^s to vmtc\n\ argv[ii+l]); 
exit(l); 

} 

char logname[128]; 

sprintfOogname, "%s.log", argy[ii+l])J 

if((ft)log = fopenOogname. "w")) =NULL) 

^ Q,rintf(stden, "Med creating log. stderruseANn"); 
^log « stdeir, 

} 

} 

else ifi[strcmp(argv[ii], "-&") — 0) 
^ sscanf(argv[ii+13."%d".&FIRST_N); 
} 

else if(strcmp(argv[Ul, "-sm") = 0) 
^ sscan^argv[ii+13."%d".&SIMPLE_CUT0FF); 
} 

else if(strcmp(aigvru], "-gc.add") = 0) 
^ sscanf(argv[ii+l]/%d",&GC.ADDITION); 
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else 



^rintf(stderr, "Unknow flag %s\n'', argv[ii]); 
exit(I); 

} 

} 

maxjprb = 30000; 
probe = new char* [maxjirb]; 
for(ii = 0; ii < inax_prb; ii++) 
probepi] = new char [LEN_MER+1]; 
cntjjrb = 0; 

max_snd = 5000; 
sndstr = new char* [max_snd]; 
for(ii = 0; ii < niax_snd; ii++) 
sndstrpi] = new char [LEN_MER+i]; 
snd_matchcnt = new int [niax_snd]; 
cnt_snd = 0; 

// build an array of probes. Each probe is of length LEN__MER, 
// of which "NUM.GC' are Gs or Cs. 

convert[0] = 'a'; 
convert[lj = 't'; 
convert[23 = 'c'; 
convert[3] = 'g'; 

total_probes = 0; 

passage = 0; // number of probes pass GC test 
for(MM[0] = 0; MM[0] < 4; MM[0]-h-) 
for(MM[l] = 0; MM[1] <4; MM[1]-h-) 
for(MM[2] = O; MM[2] < 4; MM[2]++) 
for(MM[3] = 0; MM[3] < 4; MM[3]-h-) 

foi(MM{4] « 0; MM[4] < 4; MM[4>H-) //*thin«s to pay attention to. 
for(MM[5] = O; MM[53 < 4; MM[5]-H-) //*things to pay attention to. 
for(MM[6] = O; MM[63 < 4; MM[6]++) //*things to pay attention to. 
for(MM[7i = O; MM[7] < 4; MM[7]++) //♦things to pay attention to. 
{ 

totaljprobcs++; 
gcsum - 0; 

//build a probe. 




forOj = 0; jj < LEN MER; jj++) 
{ 

strDj] = convert[MM(ij]]; 
if(str[ij3 = 'c'||strDj] = 'gO 
gcsum-H-; 

} 

str[LEN_MER]»'\0'; 

// check its GC contents and secondaiy structure. 

ifllgcsum == NUM_GC) 

{ 

pass^c++; 

Q)rintfl;i5)log, "pass GCtest: %s\n", str); 

ifi[!Secon<lStruct(str)) 

strcpy(probe[cnt_j»rb], str); 
if(++cntj)rb =» max_prb) 
{ 

// should relocate memory. 

// To simplefy the program, let's just give an error msg. 

^rintf|[stderr, "ERROR: Probe array is too small. cnt_prb is %d\n", cnt_prb); 

exit(l); 

} 

} 

else 

{ 

// record the rejected string 
strcpy(sndstr[cnt_snd], str); 
ifi^++cnt snd==max snd) 
{ 

Qnint^stderr, "ERROR: Secondary Structure array is too small. cnt_snd « %d\n", 

cnt__snd); 

exit(l); 

} 

} 

} 



J^rintfl[Q3, ''\n%d mer probe selectionNn", LEN_MER); 
fyrintf(4, "Number of GCs in the probes: %d\n", NUM_GC); 
fyrintf(^, "Score to r^ect as secondary structure: %d\n'', 

SC0REJ^EEDED_IN_2ND); 
fynnO^fp, "Score to reject ^B incompatible: %d\n", CROSSHYB_CUTOFF); 
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fprintf(^, "Compatible test overlap: %d\n", CROSSHYB^OVERLAP); 
fprintf(§), "Additional equivalent base-pair in compatibility checking: %c%c\n", 
eql,eq2); 

fprintf(fp, "Simple match cutoff value(including): %d\n", SIMPLE^CUTOFF); 
^rintf(^), "First N value(including): %d\n", FIRSTJ^; 
fyvint^^, "Additional penalty for G or C: %d\n", GC^ADDITION); 
fynaxfify, "\n\n"); 

fprintf(fp, Total possible %d mers: %d\n", LEN^MER, total jrobes); 

^rintf(^, "Number passed GC_test : %d\n", passage); 

fi)rintf(fi), 'dumber passed secondary structure test : %d\n", cnt jrb); 
// for(ii = 0; ii < cnt_snd; ii-H-) 
// fprintf(fi), "%s\n", sndstrpi]); 



// From the set (call it setl) of probes which passed GC and 2nd structure 
// tests, choose a probe into the final set(set2). Then compare this 
// probe against all the probes left in setl and throught out the ones 
// that may crosshyb to this probe. From what's left in setl, choose 
// another probe and compary it to the rest of setl... 

compatible = new char [cntjrb]; 
for(ii - 0; ii < cnt j>rb; ii++) 
{ 

compatible[ii] = T; 

} 

// Compatibility check #1: Use weighted scores to penalize neighboring matches. 
// f5rst_match_score « I ; 

// if prev pair is a match, currem_match_score = prev_jnatch_score*2. 
ii = 0; 

failed^ch = 0; 
while(ii < cntjrb) 

{ 

forQj = ii+1 ; jj < cntjrb; jj-H-) 
{ 

if^compatibleDj] = T && 
(s<:rossHyb(probe[ii],probe|j}],CROSSHYB_OVERLAP)) >- CROSSHYB.CUTOFF) 



compatible[jj] « T'; 
failed_ch++; 

§)rintf(^log, "Rejected(%d) %s in slide test for %s\n". 



s, probeOj], probe[ii]); 






ii++; 

mWIcCu < ciit_prb && con^blepi] TP) 
U++; 




fprintf(Q)» "Number of probes passed compatibility test: %d\n", 
cntjwb - failed_ch); 

// Compatibility check #2: Use unweighted score: count unconsecutive matches 

// find the first *passed' probe. 

ii = 0; 

while(ii < cntjjrfa && compatiblepi] = T') 
ii-H-; 

fidled_sm = 0; 
while(ii <cntjjib) 
{ 

forQj = jj < cnt_prb; 
{ 

if(compatible[ij] = T && 
(s=SimpleMatch(probe[ii3,probe[ij])) >= SIMPLE_CUTOFF) 

{ 

compatibleOj] = 'F; 

fyrintf(^log, "Rejected(%d) %s in simple^match test for %s\n". 

s, probeQj], probefii]); 
Med sm-H-; 

} 



ii-H-; 

while(ii < cnt jrb && compatiblepi] = T*) 
ii++; 

} 

§>rintf(fp, "Number of probes passed simple match test: %d\n", 
cnt jrb - Med_ch - failed_sm); 



// Compatibility check #3: if the furst N bases match ANYWHERE in another probe. 

// find the first 'passed' probe. 
ii = 0; 

while(ii < cnt jprb && compatible[ii] = T*) 
ii^H-; 

feiled_fii = 0; 



iAdule(ii < cntjjrb) 




' Ill' ''III' fill " ifjff 'ifif 



{ 

for(ij = ii+1 ; jj < cntj)rb; 
{ 

if(compatible|]j] = T && 
FirstN(probe[ii], probeQj], FIRST N) = T) 
{ 

compatibleQj] = T'; 
fjdled_fi»++; 

^jrint^fylog, "Rejected %s in FIRSTN test for %s\n", 
probeQj], ]sobeLii]); 

} 

} 

\(*ac(ii < cnt j)rb && compatible[ii] = 
U++; 

} 

^rintf(Q), "Number of probes passed FIRSTN compatibility test: %d\n", 
cnt j>rb - Medj^ - Med_sm - failed_fii); 

// output 

fprintf(fp, "\nSelected probes are: \n"); 

for(ii = 0; ii < cnt_prb; ii-H-) 

{ 

if(compatibIe[ij] = T) 

( 

fprintf(fp. -yos \n", probepi]); 

jj-H-; 

} 

} 

} 



// Check if 'str* contains a secondary structure. That is, if there is a 
// consecutive 3 bases that matches 'str* is folded. 
// return I if found secondary structure, 0 otherwise. 

int SecondStruct(const char •str) 
{ 

intii,ij,kk,ll; 
int sum, score[32]; 
charpreyjnatch; 
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char complement[256]; 
complementra*] ¥ ; 
complementft'] - 'a'; 
complementfc'] = 'g'; 
complementrg*] = 'c'; 

II = strlen(str); 

compl = new char pi+1]; 

for(u«0;u<n;u++) 

{ 

complpi] « compIcment[str[ii]]; 

} 

for(ii = MATCH NEEDED IN.2ND; ii < 11 - MATCH_NEEDED IN_2ND; 
{ 

prev^match = *F; 
sum = 0; 

for(ij«0;ij<u;jj++) 
{ 

score[ii] = 0; 
kk = ii*2-ij; 
if(kk<ll) 

{ 

if(str[ij] = compl[kk]) 

{ 

if(prev match = T) 
{ 

scoreCJ] - scoreQj-l] * 2; 

} 

else 

{ 

score(jj] = 1; 
prev match = T; 

} 

} 

else 
{ 

prev match-T'; 

} 

} 

sum += scoreQj]; 

} 

// fyrintf(stderr, "2' sum = %d\n", sum); 
if(sum >= SC0RE_NEEDED_IN_2ND) 
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{ 

delete Q compl; 

return 1; // Found a 2nd structure. 

} 

} 

Mii = MATCH_NEEDED.IN_2ND - 1; u < U - MATCH_NEEDED.IN.2ND; ii^^) 
{ 

prev_match = 'F; 
suni = 0; 

for(jj=0;jj<=a;jj++) 
{ 

score[jj] = 0; 
kk = ii*2+l-jj; 
if(kk<li) 

^ if(str[ij] = compl[kk]) 

ifli)rev_match = T) 

^ scoreOj] = score[ij-l]*2; 
} 

else 
{ 

score[ij] = 1; 
prev_match = T; 

} 

} 

else 

prev_match = T'; 

} 

} 

sum += scoreQj]; 

} 

// fprintf(stden, "2* sum » %d\n", sum); 
i^sum >« SC0RE_NEEDED_IN_2ND) 

{ 

delete Q compl; 

return 1; // Found a 2nd structure. 

} 



} 

delete Q compl; 
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return 0; // No 2nd structure. 

} 



// check if strl and str2 can hybridisy together. 
// return the max of match scores. 
// Assume strlen(strl) strlen(str2). 

mt CrossHyb(const char *strl , const char *str2, int overlap) 
{ 

int ii, jj, len, sum, score, prey_score, max^sum, numGC; 
char prev_match; 

len - strlen(strl); 
max_sum - 0; 

lprintf(§>log, "Sliding test between %s and %s\n", strl, str2); 

for(ii « overlap-len; ii len-overlap; ii-H-) 
{ 

numGC = 0; 
sum = 0; 

score = prev_score = 0; 

prev_match = 'F; 

Q>rintfi[iplog, "Compare "); 

for(j|j = ii; jj < len && Jj - ii < len; jj++) 

{ 

ifOj>=0&&ij.ii>=0) 



if(({strl [ij]|32) == -g- && (str2Q}-ii]132) = 'g') |1 
((strl [ij]|32) = 'C && (str2[jj-u]|32) = 'c')) 
numGC-H-; 

if(prev match = T) 
{ 

Score = iwev_score*2; 



^rintf(^log, "e/ocyoc) strlQj], str2ai-u]); 
if{(strlQjl — str2[ij-ii])|| 



(strlOj] eql && str2Di-u] = eq2) !| 
(StrlQj] = eq2 && str2[ij-ii] = eql)) 



} 

else 

{ 



score = 1; 




prev match = T; 

} 

} 

else 
{ 

score =0; 
prevjnatch = T; 

} 

sum += score; 
prev_score = score; 

} 

} 

^rintfi[fplog, "Score^/odW, sum + numGC*GC_ADDmON); 

if(sum + numGC*GC_ADDITION > max_sum) 
max_sum = sum + numGC*GC_ADDITION; 

} 

§)rintf(§)log, "Max score is %d\n", max_sum); 
return max_sum; 

} 

// Compare 2 strings base to base, 0 to 0, 1 to 1 no sliding. 

// return number of matches. 

// Assume strlen(strl) = strlen(str2). 

int SimpleMatcb(const char ^strl, const char *str2) 
{ 

int ii, sum; 
sum = 0; 

foKii = 0; ii < strlen(strl); u++) 
{ 

if((striai] — str2[ii])|| 
(strl[ii] == eql && str2[ii] = eq2) || 
(strl [ii] = eq2 && str2[ii] = eql)) 

{ 

sum++; 

} 

} 

return sum; 

FIG. 16L 



// Check if the first N bases of the two iwobes are identical, 
char FirsiN(const diar *strl, const char *str2, int N) 

{ 

intii; 

char match = T; 

if(N > strlen(strl)) 
return 'P; 

for(ii-0;ii<N;u++) 
{ 

if(!((strl[ii] = str2[ii])|| 
(strl[u] = eql && strZpi] = eq2) || 
(strlpi] eq2 && str2[ii] — eql))) 

{ 

match = 'F; 
■ break; 
} 

} 

return match; 
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"Starting Paturbagen Liteiry in 
cell grown in dextrose = noa 
expressing ccmditions 



Introduce ID Tagged Perturbagen libraiy into 
cells and express the perturbagen 
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Each ceil expresses a 
^ different perturb^en. 
Most have no eflFect... 
some kill 



Reisolate plasmid library after passage from induced 
and non induced grown cells. Keep libraries separate. 




T7 transcribe ID Tags in passaged 
"Dextrose" library and "Galactose" Library 
to generate mg quantities of ID Tags as 





FITC label Starting 
Library RNA 



Rhodamine label 
Passaged Library 





Combine and hybridize RNA from 
libraries to ID Tag Beads 




Sequence and clone Perturbagen 
DNA back into expression construct 
and validate effect 
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Pert 
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Use specific ID-Tag primer and a 
common vector primer juxtaposed 
to perturbagen to amplify 
corresponding perturbagen 
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Sequence ID Tag and 
generate complement oligo 



RT-PCR amplify RNA offbeads 
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Sort single FITC+ beads 
into 96 well PGR Plate 
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FTTC Fluorescence 



Sort all Beads on Cell sorter to identify Beads 
labeled only with FITC+ RNA 
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